# Required
install.packages("doParallel")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.6/doParallel_1.0.15.tgz'
Content type 'application/x-gzip' length 196281 bytes (191 KB)
==================================================
downloaded 191 KB
The downloaded binary packages are in
/var/folders/hm/2md7sccd0479bw81zsh0yyq80000gn/T//RtmptjktGz/downloaded_packages
library(doParallel)
Loading required package: foreach
Loading required package: iterators
Loading required package: parallel
# Find how many cores are on your machine
detectCores() # Result = Typically 4 to 6
[1] 4
# Create Cluster with desired number of cores. Don't use them all! Your computer is running other processes.
cl <- makeCluster(2)
# Register Cluster
registerDoParallel(cl)
# Confirm how many cores are now "assigned" to R and RStudio
getDoParWorkers() # Result 2
[1] 2
# Stop Cluster. After performing your tasks, stop your cluster.
stopCluster(cl)
install.packages("caret")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.6/caret_6.0-85.tgz'
Content type 'application/x-gzip' length 6264988 bytes (6.0 MB)
==================================================
downloaded 6.0 MB
The downloaded binary packages are in
/var/folders/hm/2md7sccd0479bw81zsh0yyq80000gn/T//RtmptjktGz/downloaded_packages
install.packages("plotly")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.6/plotly_4.9.2.tgz'
Content type 'application/x-gzip' length 2987045 bytes (2.8 MB)
==================================================
downloaded 2.8 MB
The downloaded binary packages are in
/var/folders/hm/2md7sccd0479bw81zsh0yyq80000gn/T//RtmptjktGz/downloaded_packages
library(caret)
Loading required package: lattice
Loading required package: ggplot2
Registered S3 method overwritten by 'dplyr':
method from
print.rowwise_df
Registered S3 method overwritten by 'data.table':
method from
print.data.table
library(plotly)
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
largeMatrix <- read.csv("LargeMatrix.csv")
# no nas present
sum(is.na(largeMatrix))
[1] 0
iphone_smallMatrix <- read.csv("iphone_smallmatrix_labeled_8d.csv")
samsung_smallMatrix <- read.csv("galaxy_smallmatrix_labeled_9d.csv")
names(iphone_smallMatrix)
[1] "iphone" "samsunggalaxy" "sonyxperia" "nokialumina" "htcphone"
[6] "ios" "googleandroid" "iphonecampos" "samsungcampos" "sonycampos"
[11] "nokiacampos" "htccampos" "iphonecamneg" "samsungcamneg" "sonycamneg"
[16] "nokiacamneg" "htccamneg" "iphonecamunc" "samsungcamunc" "sonycamunc"
[21] "nokiacamunc" "htccamunc" "iphonedispos" "samsungdispos" "sonydispos"
[26] "nokiadispos" "htcdispos" "iphonedisneg" "samsungdisneg" "sonydisneg"
[31] "nokiadisneg" "htcdisneg" "iphonedisunc" "samsungdisunc" "sonydisunc"
[36] "nokiadisunc" "htcdisunc" "iphoneperpos" "samsungperpos" "sonyperpos"
[41] "nokiaperpos" "htcperpos" "iphoneperneg" "samsungperneg" "sonyperneg"
[46] "nokiaperneg" "htcperneg" "iphoneperunc" "samsungperunc" "sonyperunc"
[51] "nokiaperunc" "htcperunc" "iosperpos" "googleperpos" "iosperneg"
[56] "googleperneg" "iosperunc" "googleperunc" "iphonesentiment"
names(samsung_smallMatrix)
[1] "iphone" "samsunggalaxy" "sonyxperia" "nokialumina" "htcphone"
[6] "ios" "googleandroid" "iphonecampos" "samsungcampos" "sonycampos"
[11] "nokiacampos" "htccampos" "iphonecamneg" "samsungcamneg" "sonycamneg"
[16] "nokiacamneg" "htccamneg" "iphonecamunc" "samsungcamunc" "sonycamunc"
[21] "nokiacamunc" "htccamunc" "iphonedispos" "samsungdispos" "sonydispos"
[26] "nokiadispos" "htcdispos" "iphonedisneg" "samsungdisneg" "sonydisneg"
[31] "nokiadisneg" "htcdisneg" "iphonedisunc" "samsungdisunc" "sonydisunc"
[36] "nokiadisunc" "htcdisunc" "iphoneperpos" "samsungperpos" "sonyperpos"
[41] "nokiaperpos" "htcperpos" "iphoneperneg" "samsungperneg" "sonyperneg"
[46] "nokiaperneg" "htcperneg" "iphoneperunc" "samsungperunc" "sonyperunc"
[51] "nokiaperunc" "htcperunc" "iosperpos" "googleperpos" "iosperneg"
[56] "googleperneg" "iosperunc" "googleperunc" "galaxysentiment"
#summary(iphone_smallMatrix$ios)
str(iphone_smallMatrix)
'data.frame': 12973 obs. of 59 variables:
$ iphone : int 1 1 1 1 1 41 1 1 1 1 ...
$ samsunggalaxy : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonyxperia : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokialumina : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcphone : int 0 0 0 0 0 0 0 0 0 0 ...
$ ios : int 0 0 0 0 0 6 0 0 0 0 ...
$ googleandroid : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonecampos : int 0 0 0 0 0 1 1 0 0 0 ...
$ samsungcampos : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonycampos : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiacampos : int 0 0 0 0 0 0 0 0 0 0 ...
$ htccampos : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonecamneg : int 0 0 0 0 0 3 1 0 0 0 ...
$ samsungcamneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonycamneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiacamneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ htccamneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonecamunc : int 0 0 0 0 0 7 1 0 0 0 ...
$ samsungcamunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonycamunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiacamunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ htccamunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonedispos : int 0 0 0 0 0 1 13 0 0 0 ...
$ samsungdispos : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonydispos : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiadispos : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcdispos : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonedisneg : int 0 0 0 0 0 3 10 0 0 0 ...
$ samsungdisneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonydisneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiadisneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcdisneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonedisunc : int 0 0 0 0 0 4 9 0 0 0 ...
$ samsungdisunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonydisunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiadisunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcdisunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphoneperpos : int 0 1 0 1 1 0 5 3 0 0 ...
$ samsungperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonyperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiaperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphoneperneg : int 0 0 0 0 0 0 4 1 0 0 ...
$ samsungperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonyperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiaperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphoneperunc : int 0 0 0 1 0 0 5 0 0 0 ...
$ samsungperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonyperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiaperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ iosperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ googleperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ iosperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ googleperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ iosperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ googleperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonesentiment: int 0 0 0 0 0 4 4 0 0 0 ...
str(samsung_smallMatrix)
'data.frame': 12911 obs. of 59 variables:
$ iphone : int 1 1 1 0 1 2 1 1 4 1 ...
$ samsunggalaxy : int 0 0 1 0 0 0 0 0 0 0 ...
$ sonyxperia : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokialumina : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcphone : int 0 0 0 1 0 0 0 0 0 0 ...
$ ios : int 0 0 0 0 0 0 0 0 0 0 ...
$ googleandroid : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonecampos : int 0 0 1 0 0 1 0 0 0 0 ...
$ samsungcampos : int 0 0 1 0 0 0 0 0 0 0 ...
$ sonycampos : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiacampos : int 0 0 0 0 0 0 0 0 0 0 ...
$ htccampos : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonecamneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ samsungcamneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonycamneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiacamneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ htccamneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonecamunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ samsungcamunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonycamunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiacamunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ htccamunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonedispos : int 0 1 0 0 0 0 2 0 0 0 ...
$ samsungdispos : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonydispos : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiadispos : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcdispos : int 0 0 0 1 0 0 0 0 0 0 ...
$ iphonedisneg : int 0 1 0 0 0 0 0 0 0 0 ...
$ samsungdisneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonydisneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiadisneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcdisneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ iphonedisunc : int 0 1 0 0 0 0 0 0 0 0 ...
$ samsungdisunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonydisunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiadisunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcdisunc : int 0 0 0 1 0 0 0 0 0 0 ...
$ iphoneperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ samsungperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonyperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiaperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcperpos : int 0 0 0 1 0 0 0 0 0 0 ...
$ iphoneperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ samsungperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonyperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiaperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcperneg : int 0 0 0 1 0 0 0 0 0 0 ...
$ iphoneperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ samsungperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ sonyperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ nokiaperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ htcperunc : int 0 0 0 1 0 0 0 0 0 0 ...
$ iosperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ googleperpos : int 0 0 0 0 0 0 0 0 0 0 ...
$ iosperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ googleperneg : int 0 0 0 0 0 0 0 0 0 0 ...
$ iosperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ googleperunc : int 0 0 0 0 0 0 0 0 0 0 ...
$ galaxysentiment: int 5 3 3 0 1 0 3 5 5 5 ...
# no missing values
sum(is.na(iphone_smallMatrix))
[1] 0
sum(is.na(samsung_smallMatrix))
[1] 0
# There are no empty columns
colSums(iphone_smallMatrix)
iphone samsunggalaxy sonyxperia nokialumina htcphone ios
27867 923 312 30 1779 1976
googleandroid iphonecampos samsungcampos sonycampos nokiacampos htccampos
514 3757 697 129 89 1469
iphonecamneg samsungcamneg sonycamneg nokiacamneg htccamneg iphonecamunc
3044 710 30 80 1206 3289
samsungcamunc sonycamunc nokiacamunc htccamunc iphonedispos samsungdispos
243 42 47 611 9304 819
sonydispos nokiadispos htcdispos iphonedisneg samsungdisneg sonydisneg
148 129 2051 7894 856 161
nokiadisneg htcdisneg iphonedisunc samsungdisunc sonydisunc nokiadisunc
103 1509 6550 352 59 64
htcdisunc iphoneperpos samsungperpos sonyperpos nokiaperpos htcperpos
812 7854 803 112 138 1660
iphoneperneg samsungperneg sonyperneg nokiaperneg htcperneg iphoneperunc
6313 832 83 133 1562 3880
samsungperunc sonyperunc nokiaperunc htcperunc iosperpos googleperpos
287 30 72 832 768 500
iosperneg googleperneg iosperunc googleperunc iphonesentiment
747 755 429 209 48318
colSums(samsung_smallMatrix)
iphone samsunggalaxy sonyxperia nokialumina htcphone ios
28525 921 311 30 1775 1972
googleandroid iphonecampos samsungcampos sonycampos nokiacampos htccampos
513 3733 693 125 89 1461
iphonecamneg samsungcamneg sonycamneg nokiacamneg htccamneg iphonecamunc
3027 710 30 80 1202 3278
samsungcamunc sonycamunc nokiacamunc htccamunc iphonedispos samsungdispos
241 40 47 608 9251 814
sonydispos nokiadispos htcdispos iphonedisneg samsungdisneg sonydisneg
143 129 2039 7849 856 161
nokiadisneg htcdisneg iphonedisunc samsungdisunc sonydisunc nokiadisunc
103 1501 6519 351 58 64
htcdisunc iphoneperpos samsungperpos sonyperpos nokiaperpos htcperpos
810 7786 801 110 138 1653
iphoneperneg samsungperneg sonyperneg nokiaperneg htcperneg iphoneperunc
6269 832 83 133 1559 3847
samsungperunc sonyperunc nokiaperunc htcperunc iosperpos googleperpos
285 28 72 829 768 500
iosperneg googleperneg iosperunc googleperunc galaxysentiment
747 755 429 209 49430
There are multiple phones in this dataset.
plot_ly(iphone_smallMatrix, x= ~iphone_smallMatrix$iphonesentiment, type='histogram')
plot_ly(samsung_smallMatrix, x= ~samsung_smallMatrix$galaxysentiment, type='histogram')
NA
0: very negative
1: negative
2: somewhat negative
3: somewhat positive
4: positive
5: very positive
Interesting.
We have two datasets. iphone and samsung. iphone has all the same variables, except for the variable “iphonesentiment”. the galaxy dataset is similiar with “galaxysentiment”.
We already have too many innecessary predictors, including google, nokia, htc, and sony phones. We are only looking for samsung galaxy and iphone.
sort(names(iphone_smallMatrix))
[1] "googleandroid" "googleperneg" "googleperpos" "googleperunc" "htccamneg"
[6] "htccampos" "htccamunc" "htcdisneg" "htcdispos" "htcdisunc"
[11] "htcperneg" "htcperpos" "htcperunc" "htcphone" "ios"
[16] "iosperneg" "iosperpos" "iosperunc" "iphone" "iphonecamneg"
[21] "iphonecampos" "iphonecamunc" "iphonedisneg" "iphonedispos" "iphonedisunc"
[26] "iphoneperneg" "iphoneperpos" "iphoneperunc" "iphonesentiment" "nokiacamneg"
[31] "nokiacampos" "nokiacamunc" "nokiadisneg" "nokiadispos" "nokiadisunc"
[36] "nokialumina" "nokiaperneg" "nokiaperpos" "nokiaperunc" "samsungcamneg"
[41] "samsungcampos" "samsungcamunc" "samsungdisneg" "samsungdispos" "samsungdisunc"
[46] "samsunggalaxy" "samsungperneg" "samsungperpos" "samsungperunc" "sonycamneg"
[51] "sonycampos" "sonycamunc" "sonydisneg" "sonydispos" "sonydisunc"
[56] "sonyperneg" "sonyperpos" "sonyperunc" "sonyxperia"
toFilter <- grepl('iphone', colnames(iphone_smallMatrix))
iphones <- iphone_smallMatrix[toFilter]
colnames(iphones)
[1] "iphone" "iphonecampos" "iphonecamneg" "iphonecamunc" "iphonedispos"
[6] "iphonedisneg" "iphonedisunc" "iphoneperpos" "iphoneperneg" "iphoneperunc"
[11] "iphonesentiment"
*is ios attribute considered part of iphone? I say no, because if it was, then it would have had ios infront of it.
# correlation matrix
#install.packages("corrplot")
#library(corrplot)
#options(max.print=1000000)
corr_matrix <- cor(iphones)
corr_plot <- corrplot(as.matrix(corr_matrix))
corr_plot
iphone iphonecampos iphonecamneg iphonecamunc iphonedispos iphonedisneg
iphone 1.000000000 0.07815733 0.49052359 0.750403174 0.05262462 0.175572621
iphonecampos 0.078157326 1.00000000 0.54133997 0.473266316 0.27258655 0.148650674
iphonecamneg 0.490523588 0.54133997 1.00000000 0.643460020 0.26198314 0.346878956
iphonecamunc 0.750403174 0.47326632 0.64346002 1.000000000 0.20900762 0.253253711
iphonedispos 0.052624621 0.27258655 0.26198314 0.209007616 1.00000000 0.868765387
iphonedisneg 0.175572621 0.14865067 0.34687896 0.253253711 0.86876539 1.000000000
iphonedisunc 0.250929821 0.18831003 0.29907429 0.361321734 0.88302623 0.879950578
iphoneperpos -0.009507666 0.34833242 0.25756896 0.190248578 0.65935383 0.530888336
iphoneperneg 0.013863107 0.15191863 0.30887521 0.113175498 0.63776843 0.640995104
iphoneperunc -0.016037424 0.18725962 0.21757939 0.174433158 0.66523752 0.570044418
iphonesentiment 0.014858654 -0.02973122 -0.08396314 0.001443485 0.01454682 0.003144905
iphonedisunc iphoneperpos iphoneperneg iphoneperunc iphonesentiment
iphone 0.25092982 -0.009507666 0.013863107 -0.01603742 0.014858654
iphonecampos 0.18831003 0.348332416 0.151918629 0.18725962 -0.029731217
iphonecamneg 0.29907429 0.257568960 0.308875213 0.21757939 -0.083963139
iphonecamunc 0.36132173 0.190248578 0.113175498 0.17443316 0.001443485
iphonedispos 0.88302623 0.659353827 0.637768430 0.66523752 0.014546824
iphonedisneg 0.87995058 0.530888336 0.640995104 0.57004442 0.003144905
iphonedisunc 1.00000000 0.554364879 0.564479458 0.62392944 0.027172723
iphoneperpos 0.55436488 1.000000000 0.794832452 0.79182763 0.029637900
iphoneperneg 0.56447946 0.794832452 1.000000000 0.75948372 -0.004804058
iphoneperunc 0.62392944 0.791827630 0.759483720 1.00000000 0.037199859
iphonesentiment 0.02717272 0.029637900 -0.004804058 0.03719986 1.000000000
# keep columns only for iphone and samsunggalaxy